In this section, each puzzle has a list of nine-letter words, each with three letters provided in either the first three, middle three, or last three positions. We also get a list of three letter blocks that can be used exactly once and must be used in that order.
The first iteration of this, I used the dictionary provided in most Linux systems at '/usr/share/dict/american-english'. Using this as the source of words, the solver was not able to find a match for the first puzzle for the word starting with 'str'. Looking at the available blocks, I guessed the word was 'strongman', and indeed this is missing from the dictionary. (A simple grep
search confirms this.)
So then I turned to a larger corpus provided by nltk.corpus.words
. But this was also missing the word 'strongman'. Finally I used the corpus nltk.corpus.wordnet
. This list does not include many common words, but it does include more uncommon words, and 'strongman' is included.
In [105]:
import functools
import nltk
import pprint
import re
nltk.download('words')
nltk.download('wordnet')
Out[105]:
In [121]:
class BuildingBlocksCandidate():
"""Data structure for holding a single candidate solution to one of the entries.
"""
def __init__(self, word_pattern, word, blocks):
"""
Args:
word_pattern: A string with some letters provided and the blanks represented by '.'
word: A word that matches `word_pattern` and the missing letters from `blocks`.
blocks: A list of strings used to complete `word` based on `word_pattern` and the
complete list of blocks.
"""
self.word_pattern = word_pattern
self.word = word
self.blocks = blocks
def __repr__(self):
return f'BuildingBlocksCandidate(word_patter="{self.word_pattern}", word="{self.word}", blocks={self.blocks})'
class BuildingBlocksEntry():
"""Data structure for holding a puzzle entry. It maps a `word_pattern` to a list of `BuildingblocksCandidate` objects.
A `BuildingBlocksEntry` can have zero or many `BuildingBlocksCandidate` objects.
"""
def __init__(self, word_pattern, candidates=None):
"""
Args:
word_pattern: A string with some letters provided and the blanks represented by '.'
candidates: If None, construct this object with an initial empty list of candidates.
Otherwise, provide the list of `BuildingBlocksCandidate` objects.
"""
self.word_pattern = word_pattern
if candidates is None:
self.candidates = []
else:
self.candidates = candidates
def __repr__(self):
return f'BuildingBlocksEntry(word_pattern="{self.word_pattern}", candidates={self.candidates})'
def add_candidate(self, candidate):
"""Append a `BuildingBlocksCandidate` object to the list of candidates.
Args:
candidate: `BuildingBlocksCandidate` object to add.
"""
self.candidates.append(candidate)
def get_candidates(self):
"""Get the list of candidates.
Returns:
A list of `BuildingBlocksCandidate` objects.
"""
return self.candidates
class BuildingBlocksPuzzle():
"""Represents an entire puzzle.
"""
def __init__(self, word_patterns, blocks, solver=None, word_provider=None):
"""
Args:
word_patterns: A list of strings, each with some letters provided and the blanks represented by '.'
Each pattern must be the same length, have the same block length, and have the same position for
the provided words.
blocks: A list of strings to use for constructing the words given the `word_patterns`
solver: An object of class derived from `ConstraintSolverBase` used for solving the puzzle.
This allows easy experimentation of different solvers.
If None, the default solver `ConstraintSolverSimple` is used.
word_provider: An object of class derived from `WordProviderBase` used for providing a list of
words in the vocabulary.
If None, the default `WordProviderNltkWordnet` is used.
"""
self.word_patterns = word_patterns
self.blocks = blocks
if solver is None:
self.solver = ConstraintSolverSimple()
if word_provider is None:
self.word_provider = WordProviderNltkWordnet()
self.word_length = BuildingBlocksPuzzle.get_item_length(word_patterns)
self.block_length = BuildingBlocksPuzzle.get_item_length(blocks)
def solve(self, verbose=False):
candidate_dct, used_blocks, unused_blocks = self.solver.solve(
self.word_patterns,
self.blocks,
self.word_provider,
verbose=verbose,
)
if verbose:
BuildingBlocksPuzzle.print_solution(candidate_dct, unused_blocks)
return candidate_dct, used_blocks, unused_blocks
@staticmethod
def get_item_length(items):
"""Helper function to determine the length of the words or blocks.
If the lengths of the items are not uniform, a `ValueError` will be raised."""
lengths = [len(b) for b in items]
if min(lengths) != max(lengths):
raise ValueError('The length of the items must be the same.')
return min(lengths)
@staticmethod
def get_re_pattern(word_pattern, blocks):
"""Given the `word_pattern` of the form 'abc......' or '...abc...' or '......abc'
and the available blocks, return the regular expression pattern to compile.
"""
re_pattern = '^' # start of word
block_length = BuildingBlocksPuzzle.get_item_length(blocks)
# Fill the blanks with regexp match patterns.
for idx in range(0, len(word_pattern), block_length):
chunk = word_pattern[idx:idx+block_length]
if chunk == '.' * block_length:
# Matching group with all the strings from `blocks` or'ed.
pattern = '({})'.format('|'.join(blocks)) # e.g., (abc|def|ghi)
# pattern = f'(\w{{{block_length}}})' # e.g., '(\w{3})' # Useful pattern for debugging.
re_pattern += pattern
elif '.' in chunk:
raise ValueError('Chunks should be all letters or all ".", instead got {}'.format(chunk))
else:
re_pattern += chunk
re_pattern += '$' # end of word
return re_pattern
@staticmethod
def print_solution(candidate_dct, unused_blocks):
for word_pattern, entry in candidate_dct.items():
candidates = entry.get_candidates()
candidate = candidates[0]
print(candidate.word_pattern, candidate.word, candidate.blocks)
print('Unused blocks:', unused_blocks)
class WordProviderBase():
def words(self):
raise NotImplementedError('Implement this method!')
class WordProviderNltkWords(WordProviderBase):
@functools.lru_cache(maxsize=128)
def words(self):
return nltk.corpus.words.words()
class WordProviderNltkWordnet(WordProviderBase):
@functools.lru_cache(maxsize=128)
def words(self):
return list(nltk.corpus.wordnet.words())
class WordProviderFile(WordProviderBase):
@functools.lru_cache(maxsize=128)
def words(self):
with open('/usr/share/dict/american-english', 'r') as f:
return f.read().splitlines()
class ConstraintSolverBase():
def solve(self, word_patterns, blocks, word_provider, verbose=False):
"""
Returns a tuple of three items:
- A dictionary mapping the word pattern to a `BuildingBlockEntry` with a single candidate
- A set of used blocks
- A set of unused blocks
"""
candidate_dct = {}
for word_pattern in word_patterns:
candidate_dct[word_pattern] = self.find_candidates(word_pattern, blocks, word_provider)
# if verbose:
# print('Candidate words:')
# pprint.pprint(candidate_dct)
used_blocks = set([])
unused_blocks = set(blocks)
for word_pattern, entry in candidate_dct.items():
for candidate in entry.get_candidates():
blks = candidate.blocks
used_blocks.update(set(blks))
unused_blocks.difference_update(set(blks))
return candidate_dct, used_blocks, unused_blocks
@staticmethod
def find_candidates(word_pattern, blocks, word_provider):
"""Return a `BuildingBlocksEntry` that fits the `word_pattern` and the `blocks`.
This does not apply any constraint logic.
"""
word_length = len(word_pattern)
candidates = {}
pattern = BuildingBlocksPuzzle.get_re_pattern(word_pattern, blocks)
p = re.compile(pattern)
words = word_provider.words()
entry = BuildingBlocksEntry(word_pattern)
for word in words:
# word = word.strip().lower()
if len(word) != word_length:
continue
matches = p.finditer(word)
if matches:
for match in matches:
blocks_used = tuple(match.groups())
entry.add_candidate(BuildingBlocksCandidate(word_pattern, word, blocks_used))
return entry
class ConstraintSolverSimple(ConstraintSolverBase):
"""Simple strategy that makes some broad assumptions:
1) Each word pattern is mapped to exactly one candidate.
2) Each block is used at most one time.
"""
def solve(self, word_patterns, blocks, word_provider, verbose=False):
candidate_dct, used_blocks, unused_blocks = super().solve(word_patterns, blocks, word_provider, verbose=verbose)
# ensure each word pattern is mapped to exactly one candidate
error_messages = []
for word_pattern, entry in candidate_dct.items():
if len(entry.get_candidates()) != 1:
error_messages.append(f'Expected exactly one candidate for {word_pattern}, found {entry.get_candidates()}')
if len(error_messages) > 0:
print(error_messages)
raise ValueError('Constraints not met.')
return candidate_dct, used_blocks, unused_blocks
class ConstraintSolverSearch(ConstraintSolverBase):
"""A solver that can handle situations where there are more than one candidate for a word pattern.
The strategy is to treat each candidate within an entry as a node, then approach the problem as
a graph search with constraint propagation. The algorithm goes like this:
- Order the entries from least number of candidates to most.
- While we have not satisfied all the constraints
- Visit the next available entry
- Push the next available candidate on the stack, considering the blocks already used.
- Add the blocks for the candidate to the list of used blocks.
- If no candidates are available, pop the candidate from the stack.
# A more complex solver would first settle all the clues with a single candidate,
# then search the remaining solution spaces.
# This can be implemented as a search where we order the nodes for visiting from
# the least number of candidates to the most. After visting each node and "locking"
# it, we update the others to remove any candidates using any of the blocks just locked.
"""
def solve(self, word_patterns, blocks, word_provider, verbose=False):
candidate_dct, used_blocks, unused_blocks = super().solve(word_patterns, blocks, word_provider, verbose=verbose)
# TODO: pseudocode for now
blocks_used = set([])
for word_pattern, entry in sorted(candidate_dct.items(), key=lambda e: (e.n_candidates, entry.word)):
candidates = [candidate for candidate in entry.get_candidates() if blocks_used.is_disjoint(set(candidate.blocks))]
#
return candidate_dct, used_blocks, unused_blocks
p1 = BuildingBlocksPuzzle(
word_patterns=[
'pol......', 'imb......', 'por......', 'str......',
'veg......', 'con......', 'app......', 'wee......',
],
blocks=[
'ble', 'cal', 'cup', 'emi',
'eta', 'ght', 'hig', 'hti',
'ine', 'ion', 'kni', 'lia',
'lio', 'man', 'mes', 'nce',
'ong', 'rog', 'tag',
],
)
candidate_dct, used_blocks, unused_blocks = p1.solve(verbose=True)
In [ ]:
In [100]:
p2 = BuildingBlocksPuzzle(
word_patterns=[
'...ilo...', '...bfo...', '...irw...', '...mis...',
'...lic...', '...erb...', '...per...', '...tai...',
],
blocks=[
'aff', 'cer', 'cha', 'com',
'dum', 'ean', 'ell', 'ium',
'nom', 'nty', 'one', 'quy',
'sar', 'sno', 'sol', 'sta',
'ted', 'und', 'ytt',
],
)
candidate_dct, used_blocks, unused_blocks = p2.solve(verbose=True)
In [101]:
p3 = BuildingBlocksPuzzle(
word_patterns=[
'......lly', '......ang', '......day', '......ide',
'......ish', '......chy', '......ous', '......ast',
],
blocks=[
'abl', 'aci', 'akf', 'bli',
'boo', 'bre', 'efu', 'ego',
'est', 'gar', 'hop', 'ing',
'mer', 'nds', 'nic', 'oli',
'ter', 'viv', 'yes',
],
)
candidate_dct, used_blocks, unused_blocks = p3.solve(verbose=True)
In [ ]:
In [122]:
%time candidate_dct, used_blocks, unused_blocks = p1.solve()
In [123]:
%prun p1.solve()
In [126]:
%load_ext line_profiler
In [129]:
%lprun -f BuildingBlocksPuzzle.solve -f ConstraintSolverBase.find_candidates p1.solve()
In [124]:
%load_ext memory_profiler
In [130]:
%memit p1.solve()
In [ ]: